
*Set working directory
cd ""

****************************************
***Simulations
***2024 Electoral College Forecast
****************************************
*NEED TO INSTALL
	*ssc install clarify
	* install gr0034.pkg  (labmask)
	*ssc install stripplot
*****************************************

set seed 453890886 

local d = 55
local min=`d'-3
local max=`d'+3
forvalues p = `min'(1)`max' {

use "Forecast2024_Replication.dta", clear

local forecastyear = 2024
replace Cwt_`p'_d_ = Cwt_`p'_d_*-1 if republican_incumbent ==1

*As discussed in Appendix 2.1, the variance of economic conditions in 2020 was almost 10X previous years due to COVID. We do not want these extreme values unduly influencing future forecasts. Thus, we recode the sd of 2020 economic conditions to equal the average of the three previous elections. 
forvalues i = 2008(4)2020 {
      sum Cwt_`p'_d_ if year==`i'
      local sd`i' = `r(sd)'
      local mean`i' = `r(mean)'
	  di `sd`i''
}

local avgsd = (`sd2008'+`sd2012'+`sd2016')/3

egen stdecon2020_`p' = std(Cwt_`p'_d_) if year==2020, mean(`mean2020') sd(`avgsd')
sum stdecon2020_`p'
replace Cwt_`p'_d_ = stdecon2020_`p' if year==2020

local sims=10000
	
* Biden forecast, so do not adjust for Harris' approval ratings

* Makes sure only necessary years included:
drop if year <1976

* generate constant to be added to variable matrix
gen cons =1

* order data for matrix later (DV first, ignore later)
order  PerDem2Party lag_PerDem2Party_dev pres_approval_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state cons

	mkmat lag_PerDem2Party_dev pres_approval_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state cons if year == `forecastyear', matrix(Variables) rownames(State)

	*Save RMSE as scalar to use later
	*the regression needs to be set year<forecastyear
reg PerDem2Party lag_PerDem2Party_dev pres_approval_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state if year>1976 & year<`forecastyear'
	scalar rmse = `e(rmse)'

	* create simulations with the same variables (lag_ instead of l.)
	*AS ABOVE, REGRESSION NEEDS TO BE SET TO YEAR<forecastyear
	*ssc install clarify
	estsimp reg PerDem2Party lag_PerDem2Party_dev pres_approval_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state if year>1976 & year<`forecastyear', sims(`sims')
	* drop variables so we can name the columns the simulations exactly like the name of the variables (avoids errors by naming them b1 etc). the data is not needed anymore after the simulations

	drop cons lag_PerDem2Party_dev pres_approval_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state 

* rename the column names so column and row names are the same.

	rename b1 lag_PerDem2Party_dev
	rename b2 pres_approval_`forecastyear'
	rename b3 Cwt_`p'_d_
	rename b4 Anderson
	rename b5 Perot2
	rename b6 democrat_p_state
	rename b7 lag_democrat_p_state
	rename b8 democrat_vp_state
	rename b9 cons
 
* make sure the number of rows does not exceed the number of simulations (otherwise it will be part of the Parameter matrix)

	drop if cons ==.

* create matrix based on the simulations, the columns are named exactly like the variables
	set matsize  11000
	mkmat lag_PerDem2Party_dev pres_approval_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state cons, matrix(Parameters) 
	
* multiply both matrices
	matrix define Uncertainty=Parameters*Variables'

* change new matrix to data and name the variables according to the state names
	svmat Uncertainty, names(col)

*drop all other variables
	drop lag_PerDem2Party_dev-b10

*Make sure state order is identical
	order Alabama
	order Arizona, before(Arkansas)
	order Delaware, before(District_of_Columbia)
	order Iowa, after(Indiana)
	order Maine, before(Massachusetts)
	order Maryland, before(Massachusetts)
	order Mississippi, before(Missouri)
	order Nebraska, after(Montana)
	order Nevada, after(Nebraska)
	order New_Hampshire, after(Nevada)
	order New_Jersey, after(New_Hampshire)
	order New_Mexico, after(New_Jersey)
	order New_York, after(New_Mexico)
	order Vermont, after(Utah)
	order West_Virginia, after(Washington)
	
*Incoporate forecasting error
*Allow seed to vary across different parameter weight models
*RMSE STORED AS SCALAR ABOVE
*generate normally distributed variable w/ sd = rmse
	local sd = rmse

*generate a local within range of correlated error
*potential range = ~50% common variance - ~95% common variance
local u = (0.97 - 0.7)*runiform()+ .7
di `u'
*set mean to 0
matrix m = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) 
///
*set sd to sd set above (based on RMSE)
matrix sd = (`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd') 
///
*gen matrix of correlated errors
mat def c = J(51,51,`u') 
local rowcount = 1
forval diag = 1/51 {
          matrix c[`diag',`diag'] = 1
}


*generate corrleated errors
corr2data E_Alabama E_Alaska E_Arizona E_Arkansas E_California E_Colorado E_Connecticut E_Delaware E_District_of_Columbia E_Florida E_Georgia E_Hawaii E_Idaho E_Illinois E_Indiana E_Iowa E_Kansas E_Kentucky E_Louisiana E_Maine E_Maryland E_Massachusetts E_Michigan E_Minnesota E_Mississippi E_Missouri E_Montana E_Nebraska E_Nevada E_New_Hampshire E_New_Jersey E_New_Mexico E_New_York E_North_Carolina E_North_Dakota E_Ohio E_Oklahoma E_Oregon E_Pennsylvania E_Rhode_Island E_South_Carolina E_South_Dakota E_Tennessee E_Texas E_Utah E_Vermont E_Virginia E_Washington E_West_Virginia E_Wisconsin E_Wyoming, n(10000) corr(c) means(m) sds(sd) cstorage(full)


*Add forecast error to yhats (which already incorporate model error)
* Not adding in 2024 because redundant; + forecasterror 
foreach var of varlist Alabama - Wyoming {
		gen `var'_forecast = `var' + E_`var'
	}
*******************

*Save created files in a separate folder "SimulationData"
save "SimulationData\B_Uncertainty_estimates_`forecastyear'_`p'.dta",  replace

* Create a Matrix for the electoral college votes 

	use "Electoral_Votes_Final.dta", clear
	

* Only keep numbers for year
local forecastyear = 2024
	keep if year ==`forecastyear'
	
* Insure state order the same across datasets. 
	keep  state_initnum Votes
*Added 5 for the nebraska maine district EC allocations
	egen total_EC=sum(Votes)
	gen cut_off =(total_EC+5)/2+1 


* create long matrix for total available EC votes
	mkmat state_initnum Votes, matrix(EC_votes)

	keep cut_off 
	keep in 1
	*Save created files in a separate folder "SimulationData"
	save "SimulationData\B_total_EC_`forecastyear'_`p'.dta", replace


* create matrix of all simulations:
	use "SimulationData\B_Uncertainty_estimates_`forecastyear'_`p'", clear

* drop all unecessary variables:
*	drop Alabama-forecasterror
	drop Alabama-E_Wyoming
* rename the forecast variables to state variables
	rename *_forecast *

* create a variable indicate forecasted Dem win

	foreach var of varlist Alabama-Wyoming {
		gen `var'_demwin =1 if `var' >50
		replace `var'_demwin =0 if `var' <=50
		label var `var'_demwin "Democrats win 2Party Vote in `var'"
	}	
	
* create matrix for state_simulations. only 0 and 1 included
	mkmat Alabama_demwin -Wyoming_demwin, matrix(state_simulations)

* multiply both matrices:
	mat define all = state_simulations*EC_votes

*convert them back to data
	svmat all

	
* add cut-off variable
	append using  "SimulationData\B_total_EC_`forecastyear'_`p'.dta"

	gsort- cut_off
	replace cut_off = cut_off[1]
	
	drop if all2 ==.
* all1 is the multiplication of state_initnum, all2 is the multiplication of all EC votes the Dems should receive


* Add the district state electoral adjustments here
* 2020 Elections Data Source: 
*https://www.dailykos.com/stories/2018/2/21/1742660/-The-ultimate-Daily-Kos-Elections-guide-to-all-of-our-data-sets
* Daily Kos Elections statewide election results by congressional districts used from 2012-2020
* Accessed: 6/6/24

* Create vars for previous year values statewide and by district
generate PY_Maine = ((435072)/(435072+360737))*100
generate PY_Maine_D1 = ((266376)/(266376+164045))*100
generate PY_Maine_D2 = ((168696)/(168696+196692))*100

generate PY_Nebraska = ((374583)/(374583+556846))*100
generate PY_Nebraska_D1 = ((132261)/(132261+180290))*100
generate PY_Nebraska_D2 = ((176468)/(176468+154377))*100
generate PY_Nebraska_D3 = ((65854)/(65854+222179))*100

* calculate statewide shift and apply to each district
generate Maine_shift = Maine - PY_Maine
generate Maine_D1 = Maine_shift + PY_Maine_D1
generate Maine_D2 = Maine_shift + PY_Maine_D2

generate Nebraska_shift = Nebraska - PY_Nebraska
generate Nebraska_D1 = Nebraska_shift + PY_Nebraska_D1
generate Nebraska_D2 = Nebraska_shift + PY_Nebraska_D2
generate Nebraska_D3 = Nebraska_shift + PY_Nebraska_D3

* create var indicating if demwin
gen Maine_D1_demwin= 1 if Maine_D1 > 50
replace Maine_D1_demwin =0 if Maine_D1 <=50

gen Maine_D2_demwin= 1 if Maine_D2 > 50
replace Maine_D2_demwin =0 if Maine_D2 <=50

gen Nebraska_D1_demwin= 1 if Nebraska_D1 > 50
replace Nebraska_D1_demwin =0 if Nebraska_D1 <=50

gen Nebraska_D2_demwin= 1 if Nebraska_D2 > 50
replace Nebraska_D2_demwin =0 if Nebraska_D2 <=50

gen Nebraska_D3_demwin= 1 if Nebraska_D3 > 50
replace Nebraska_D3_demwin =0 if Nebraska_D3 <=50

* create all3 that adds the nebraska and maine results
gen all3 = all2
replace all3 = all2 + 1 if Maine_D1_demwin == 1
replace all3 = all3 + 1 if Maine_D2_demwin == 1

replace all3 = all3 + 1 if Nebraska_D1_demwin == 1
replace all3 = all3 + 1 if Nebraska_D2_demwin == 1
replace all3 = all3 + 1 if Nebraska_D3_demwin == 1


* create a variable indicating if Dems make at least 270 or more
	gen demwin= 1 if all3 >= cut_off
	replace demwin=0 if all3 < cut_off

	label define demwin 1 "Democrats win EC" 0 "Republicans win EC"
	label values demwin demwin 

	tab demwin
	gen parameter = `p'
*Save created files in a separate folder "SimulationData"
save "SimulationData\B_histogram_data_`forecastyear'_`p'.dta", replace
}

use "SimulationData\B_histogram_data_2024_`min'.dta", clear
local min2 = `min'+1
forvalues p = `min2'(1)`max' {
append using "SimulationData\B_histogram_data_2024_`p'.dta"
}
*Save created files in a separate folder "SimulationData"
save "SimulationData\B_histogram_data_2024_all_`sims'.dta", replace

****************************************
****************************************
****************************************
*Percent of Simulations with a Biden victory
****************************************
tab demwin
****************************************
****************************************
****************************************

